# A “Watershed” Moment for Collaboration: 
# The Impacts of Legislation and Network Governance on Environmental Outcomes
# Yixin Liu, Jiho Kim, Tina Nabatchi 2025
# Public Administration Review
# Replication file for regression discontinuity analysis

library(dplyr)
library(tidyr)
library(ggplot2)
library(cowplot)
library(egg)
library(rdrobust)
library(rddensity)
library(sandwich)
library(lmtest)
library(stargazer)

####function####
element_textbox <- function(...) {
  el <- element_text(...)
  class(el) <- c("element_textbox", class(el))
  el
}

element_grob.element_textbox <- function(element, ...) {
  text_grob <- NextMethod()
  rect_grob <- element_grob(calc_element("strip.background", theme_bw()))
  
  ggplot2:::absoluteGrob(
    grid::gList(
      element_grob(calc_element("strip.background", theme_bw())),
      text_grob
    ),
    height = grid::grobHeight(text_grob), 
    width = grid::unit(1, "npc")
  )
}

####Data####
# Setup working directory
WD = getwd()
setwd(WD)
df <- read.csv("df_for_rd.csv")

# date
df$date1 = as.Date.character(df$Sample.Date,"%m/%d/%Y")
df = df %>% 
  separate(date1, sep="-", into = c("year", "month", "day"))

# Convert Sample.Date to date type
df$date = as.Date(df$Sample.Date,"%m/%d/%Y")

# Create the lagged dependent variable within each Station.ID
df <- df %>%
  group_by(Station.ID) %>%
  dplyr::mutate(lag_wqi = c(NA, head(wqi, -1))) %>%
  ungroup()

####Figure 2####
df_annual<-df %>%
  group_by(year) %>%
  dplyr::summarise(wqi=mean(wqi))
df_annual$year<-as.character(df_annual$year)

plot_figure2 <- ggplot(df_annual, aes(x=year, y=wqi, group = 1)) +
  geom_hline(yintercept=80, linetype="solid", color = "black", size=1.2) +
  geom_point(size = 3, color = "#56B4E9") +
  geom_line(color = "#56B4E9") +
  labs(title = "Historical Trend of Oregon Water Quality Index", 
       y = "Oregon Water Quality Index", x = "Year") +
  geom_vline(xintercept="1997", linetype="dashed", color = "#CC6666", size=1.2) +
  annotate("text", x = "1996", y = 80, label = "OWQI ≥ 80: Fair or better performance", vjust = -0.5, hjust = 1, size = 4) +
  theme_bw() +
  theme(plot.title = element_textbox(
    hjust = 0.5, margin = margin(t = 5, b = 5)),
    axis.text.x = element_text(angle = -45, hjust = 0),
    axis.text.y = element_text(size = 12))
plot_figure2
ggsave(file = "figure2.jpg", width = 8, height = 4, dpi = 666)
ggsave(file = "figure2.eps", width = 8, height = 4, dpi = 666)

####Table 1####
# cutoff point
df$diff_dates<-as.integer(difftime(df$date, "1997-01-01", units = "days"))
df$law<-ifelse(df$diff_dates>0,"Post-OPSW","Pre-OPSW")

df <- df %>% 
  mutate(D = ifelse(diff_dates < 0, 0, 1),  # Create binary treatment indicator
         x_right = D * diff_dates)

#####Model 1####
# optimal bandwidth
bws <- rdbwselect(df$wqi, df$diff_dates, p=1, c = 0,
                  kernel = "tri", bwselect="mserd", cluster = df$huc8,
                  covs = df$forest + df$lag_wqi)

bws$bws

df$bandwidth <- bws$bws[1]

df$kw <- 1-(abs(0 - df$diff_dates))/df$bandwidth

df$kw[df$diff_dates > (0 + df$bandwidth) | 
        df$diff_dates < (0 - df$bandwidth)] <- 0

# regression
rd_optimal <- lm(wqi ~ D + diff_dates + x_right + forest + lag_wqi, data = df[df$kw > 0,])
summary(rd_optimal)
# cluster SE at watershed level
rd_optimal_cluster <- coeftest(rd_optimal, vcov. = vcovHC(rd_optimal, type = "HC1", 
                                                          cluster = df[df$kw > 0,]$huc8))
rd_optimal_cluster
mean_optimal=mean(df[df$kw > 0,]$wqi[df[df$kw > 0,]$diff_dates < 0])

#####Model 2####
# 1-year bandwidth
rd_1year <- lm(wqi ~ D + diff_dates + x_right + forest + lag_wqi, 
          data = df[df$diff_dates<=365 & df$diff_dates>=-365,])
summary(rd_1year)
# cluster SE at watershed level
rd_1year_cluster <- coeftest(rd_1year, vcov. = vcovHC(rd_1year, type = "HC1", 
                                                      cluster = df[df$diff_dates<=365 & df$diff_dates>=-365,]$huc8))
rd_1year_cluster
mean_1year=mean(df[df$diff_dates<=365 & df$diff_dates>=-365,]$wqi[df[df$diff_dates<=365 & df$diff_dates>=-365,]$diff_dates < 0])

#####Model 3####
# half-year bandwidth
rd_0.5year <- lm(wqi ~ D + diff_dates + x_right + forest + lag_wqi, 
          data = df[df$diff_dates<=180 & df$diff_dates>=-180,])
summary(rd_0.5year)
# cluster SE at watershed level
rd_0.5year_cluster <- coeftest(rd_0.5year, vcov. = vcovHC(rd_0.5year, type = "HC1", 
                                                          cluster = df[df$diff_dates<=180 & df$diff_dates>=-180,]$huc8))
rd_0.5year_cluster
mean_0.5year=mean(df[df$diff_dates<=180 & df$diff_dates>=-180,]$wqi[df[df$diff_dates<=180 & df$diff_dates>=-180,]$diff_dates < 0])

#####Model 4####
# full sample
rd_global <- lm(wqi ~ D + diff_dates + x_right + forest + lag_wqi, data = df)
summary(rd_global)
# cluster SE at watershed level
rd_global_cluster <- coeftest(rd_global, vcov. = vcovHC(rd_global, type = "HC1", 
                                                        cluster = df$huc8))
rd_global_cluster
mean_global=mean(df$wqi[df$diff_dates < 0])

#####Output####
stargazer(rd_optimal,rd_1year,rd_0.5year,rd_global,
          style = "apsr",
          se=list(rd_optimal_cluster[,2],rd_1year_cluster[,2],rd_0.5year_cluster[,2],rd_global_cluster[,2]),
          p=list(rd_optimal_cluster[,4],rd_1year_cluster[,4],rd_0.5year_cluster[,4],rd_global_cluster[,4]),
          covariate.labels=c("D","T","D x T","Forest","OWQIt-1"),
          column.labels=c("Optimal bandwidth",
                          "One-year bandwidth",
                          "Half-year bandwidth",
                          "Full sample"),
          dep.var.labels.include = FALSE,
          intercept.bottom=TRUE,
          order = c("D","diff_dates","x_right","forest","lag_wqi"),
          keep.stat=c("rsq","n"),
          star.cutoffs = c(0.05, 0.01, 0.001),
          align=TRUE,no.space=TRUE)

####Figure 3####
plot_figure3 <- df[df$kw > 0, ] %>%
  ungroup %>%
  mutate(`Treatment Status` = ifelse(diff_dates < 0, "Pre-OPSW", "Post-OPSW")) %>%
  ggplot(aes(x = diff_dates, y = wqi, group = `Treatment Status`, color = `Treatment Status`)) +
  
  geom_point(data = df[df$kw > 0, ] %>% 
               ungroup %>% 
               mutate(diff_dates = round(diff_dates, 3)) %>% 
               group_by(diff_dates) %>% 
               dplyr::summarize(wqi = mean(wqi),
                                num_obs = n(),
                                size_category = case_when(
                                  num_obs >= 10 & num_obs <= 20 ~ "Small", # Small for 10–20
                                  num_obs > 20 ~ "Large"                   # Large for >20
                                )),
             aes(x = diff_dates, y = wqi, size = size_category), 
             alpha = .6, inherit.aes = F, pch = 1) +
  
  geom_smooth(method = "lm",
              formula = y ~ poly(x, 1),
              se = TRUE,
              size = 1,
              alpha = 0.8) +
  scale_color_manual(values = c("#00BFC4", "#F8766D")) +
  
  # Map size categories to specific sizes
  scale_size_manual(
    values = c("Small" = 2, "Large" = 5), # Map categories to sizes
    breaks = c("Small", "Large"),         # Legend breaks
    labels = c("10–20", ">20")            # Legend labels
  ) +
  
  geom_vline(xintercept = .5, linetype = "dashed") +
  guides(color = guide_legend(title.position = "top", title.hjust = 0.5, reverse = TRUE),
         size = guide_legend(title.position = "top", title.hjust = 0.5)) +
  labs(size = 'Number of Observations',
       x = "Days from OPSW Implementation", y = "Oregon Water Quality Index",
       title = "RDiT Estimate of the Legislation Effect\nwithin the Optimal Bandwidth") +
  theme_bw() +
  theme(legend.position = "bottom",
        legend.direction = "horizontal",
        legend.key = element_blank(),
        legend.background = element_blank(),
        plot.title = element_textbox(
          hjust = 0.5, margin = margin(t = 5, b = 5))) +
  scale_x_continuous(breaks = c(-1000, -500, 0, 500, 1000))

plot_figure3
ggsave(file = "figure3.jpg", width = 7, height = 5, dpi = 666)
ggsave(file = "figure3.pdf", width = 7, height = 5, dpi = 666)

####Table A5####
df$diff_dates_placebo<-as.integer(difftime(df$date, "1998-01-01", units = "days"))

df <- df %>% 
  mutate(D_placebo = ifelse(diff_dates_placebo < 0, 0, 1),  # Create binary treatment indicator
         x_right_placebo = D_placebo * diff_dates_placebo)

# placebo bws
bws_placebo <- rdbwselect(df$wqi, df$diff_dates_placebo, p=1, c = 0,
                          kernel = "tri", bwselect="mserd", 
                          covs = df$forest + df$lag_wqi)

bws_placebo$bws

df$bandwidth_placebo <- bws_placebo$bws[1]

df$kw_placebo <- 1-(abs(0 - df$diff_dates_placebo))/df$bandwidth_placebo

df$kw_placebo[df$diff_dates_placebo > (0 + df$bandwidth_placebo) | 
                df$diff_dates_placebo < (0 - df$bandwidth_placebo)] <- 0

# regression
rd_placebo <- lm(wqi ~ D_placebo + diff_dates_placebo + x_right_placebo + forest + lag_wqi, data = df[df$kw_placebo > 0,])
summary(rd_placebo)
# cluster SE at watershed level
rd_placebo_cluster <- coeftest(rd_placebo, vcov. = vcovHC(rd_placebo, type = "HC1", cluster = df[df$kw_placebo > 0,]$huc8))
rd_placebo_cluster
mean_placebo=mean(df[df$kw_placebo > 0,]$wqi[df[df$kw_placebo > 0,]$diff_dates < 0])

#####Output####
stargazer(rd_placebo,
          style = "apsr",
          se=list(rd_placebo_cluster[,2]),
          p=list(rd_placebo_cluster[,4]),
          covariate.labels=c("D","T","D x T","Forest","OWQIt-1"),
          column.labels=c("Optimal bandwidth"),
          dep.var.labels.include = FALSE,
          intercept.bottom=TRUE,
          order = c("D","diff_dates","x_right","forest","lag_wqi"),
          keep.stat=c("rsq","n"),
          star.cutoffs = c(0.05, 0.01, 0.001),
          align=TRUE,no.space=TRUE)

####Figure A1####
plot_figurea1.1 <- df[df$kw > 0, ] %>%
  ungroup %>%
  mutate(`Treatment Status` = ifelse(diff_dates < 0, "Pre-OPSW", "Post-OPSW")) %>%
  ggplot(aes(x = diff_dates, y = wqi, group = `Treatment Status`, color = `Treatment Status`)) +
  
  geom_point(data = df %>% 
               ungroup %>% 
               mutate(diff_dates = round(diff_dates, 3)) %>% 
               group_by(diff_dates) %>% 
               dplyr::summarize(wqi = mean(wqi),
                                num_obs = n(),
                                size_category = case_when(
                                  num_obs >= 10 & num_obs <= 20 ~ "Small", # Small for 10–20
                                  num_obs > 20 ~ "Large"                   # Large for >20
                                )),
             aes(x = diff_dates, y = wqi, size = size_category), 
             alpha = .6, inherit.aes = F, pch = 1) +
  
  geom_smooth(method = "lm",
              formula = y ~ poly(x, 1),
              se = TRUE,
              size = 1,
              alpha = 0.8) +
  scale_color_manual(values = c("#00BFC4", "#F8766D")) +
  
  # Map size categories to specific sizes
  scale_size_manual(
    values = c("Small" = 2, "Large" = 5), # Map categories to sizes
    breaks = c("Small", "Large"),         # Legend breaks
    labels = c("10–20", ">20")            # Legend labels
  ) +
  
  geom_vline(xintercept = .5, linetype = "dashed") +
  geom_vline(xintercept = -1112, linetype = 1, color = "red") +
  geom_vline(xintercept = 1112, linetype = 1, color = "red") +
  guides(color = guide_legend(title.position = "top", title.hjust = 0.5, reverse = TRUE),
         size = guide_legend(title.position = "top", title.hjust = 0.5)) +
  labs(size = 'Number of Observations',
       x = "Days from OPSW Implementation", y = "Oregon Water Quality Index",
       title = "RDiT Estimate of the Legislation Effect\nwithin the Optimal Bandwidth") +
  theme_bw() +
  theme(legend.position = "bottom",
        legend.direction = "horizontal",
        legend.key = element_blank(),
        legend.background = element_blank(),
        plot.title = element_textbox(
          hjust = 0.5, margin = margin(t = 5, b = 5))) +
  scale_x_continuous(breaks = c(-5000,-1112,0,1112,5000,7500))
plot_figurea1.1


plot_figurea1.2 <- df[df$diff_dates<=365 & df$diff_dates>=-365,] %>%
  ungroup %>%
  mutate(`Treatment Status` = ifelse(diff_dates < 0, "Pre-OPSW", "Post-OPSW")) %>%
  ggplot(aes(x = diff_dates, y = wqi, group = `Treatment Status`, color = `Treatment Status`)) +
  
  geom_point(data = df[df$diff_dates<=365 & df$diff_dates>=-365,] %>% 
               ungroup %>% 
               mutate(diff_dates = round(diff_dates, 3)) %>% 
               group_by(diff_dates) %>% 
               dplyr::summarize(wqi = mean(wqi),
                                num_obs = n(),
                                size_category = case_when(
                                  num_obs >= 10 & num_obs <= 20 ~ "Small", # Small for 10–20
                                  num_obs > 20 ~ "Large"                   # Large for >20
                                )),
             aes(x = diff_dates, y = wqi, size = size_category), 
             alpha = .6, inherit.aes = F, pch = 1) +
  
  geom_smooth(method = "lm",
              formula = y ~ poly(x, 1),
              se = TRUE,
              size = 1,
              alpha = 0.8) +
  scale_color_manual(values = c("#00BFC4", "#F8766D")) +
  
  # Map size categories to specific sizes
  scale_size_manual(
    values = c("Small" = 2, "Large" = 5), # Map categories to sizes
    breaks = c("Small", "Large"),         # Legend breaks
    labels = c("10–20", ">20")            # Legend labels
  ) +
  
  geom_vline(xintercept = .5, linetype = "dashed") +
  guides(color = guide_legend(title.position = "top", title.hjust = 0.5, reverse = TRUE),
         size = guide_legend(title.position = "top", title.hjust = 0.5)) +
  labs(size = 'Number of Observations',
       x = "Days from OPSW Implementation", y = "Oregon Water Quality Index",
       title = "RDiT Estimate of the Legislation Effect\nwithin the One-Year Bandwidth") +
  theme_bw() +
  theme(legend.position = "bottom",
        legend.direction = "horizontal",
        legend.key = element_blank(),
        legend.background = element_blank(),
        plot.title = element_textbox(
          hjust = 0.5, margin = margin(t = 5, b = 5))) +
  scale_x_continuous(breaks = c(-365,-180,0,180,365))
plot_figurea1.2

plot_figurea1.3 <- df[df$diff_dates<=180 & df$diff_dates>=-180,] %>%
  ungroup %>%
  mutate(`Treatment Status` = ifelse(diff_dates < 0, "Pre-OPSW", "Post-OPSW")) %>%
  ggplot(aes(x = diff_dates, y = wqi, group = `Treatment Status`, color = `Treatment Status`)) +
  
  geom_point(data = df[df$diff_dates<=180 & df$diff_dates>=-180,] %>% 
               ungroup %>% 
               mutate(diff_dates = round(diff_dates, 3)) %>% 
               group_by(diff_dates) %>% 
               dplyr::summarize(wqi = mean(wqi),
                                num_obs = n(),
                                size_category = case_when(
                                  num_obs >= 10 & num_obs <= 20 ~ "Small", # Small for 10–20
                                  num_obs > 20 ~ "Large"                   # Large for >20
                                )),
             aes(x = diff_dates, y = wqi, size = size_category), 
             alpha = .6, inherit.aes = F, pch = 1) +
  
  geom_smooth(method = "lm",
              formula = y ~ poly(x, 1),
              se = TRUE,
              size = 1,
              alpha = 0.8) +
  scale_color_manual(values = c("#00BFC4", "#F8766D")) +
  
  # Map size categories to specific sizes
  scale_size_manual(
    values = c("Small" = 2, "Large" = 5), # Map categories to sizes
    breaks = c("Small", "Large"),         # Legend breaks
    labels = c("10–20", ">20")            # Legend labels
  ) +
  
  geom_vline(xintercept = .5, linetype = "dashed") +
  guides(color = guide_legend(title.position = "top", title.hjust = 0.5, reverse = TRUE),
         size = guide_legend(title.position = "top", title.hjust = 0.5)) +
  labs(size = 'Number of Observations',
       x = "Days from OPSW Implementation", y = "Oregon Water Quality Index",
       title = "RDiT Estimate of the Legislation Effect\nwithin the Half-Year Bandwidth") +
  theme_bw() +
  theme(legend.position = "bottom",
        legend.direction = "horizontal",
        legend.key = element_blank(),
        legend.background = element_blank(),
        plot.title = element_textbox(
          hjust = 0.5, margin = margin(t = 5, b = 5))) +
  scale_x_continuous(breaks = c(-180,-90,0,90,180))
plot_figurea1.3

plot_figurea1.4 <- df %>%
  ungroup %>%
  mutate(`Treatment Status` = ifelse(diff_dates < 0, "Pre-OPSW", "Post-OPSW")) %>%
  ggplot(aes(x = diff_dates, y = wqi, group = `Treatment Status`, color = `Treatment Status`)) +
  
  geom_point(data = df %>% 
               ungroup %>% 
               mutate(diff_dates = round(diff_dates, 3)) %>% 
               group_by(diff_dates) %>% 
               dplyr::summarize(wqi = mean(wqi),
                                num_obs = n(),
                                size_category = case_when(
                                  num_obs >= 10 & num_obs <= 20 ~ "Small", # Small for 10–20
                                  num_obs > 20 ~ "Large"                   # Large for >20
                                )),
             aes(x = diff_dates, y = wqi, size = size_category), 
             alpha = .6, inherit.aes = F, pch = 1) +
  
  geom_smooth(method = "lm",
              formula = y ~ poly(x, 1),
              se = TRUE,
              size = 1,
              alpha = 0.8) +
  scale_color_manual(values = c("#00BFC4", "#F8766D")) +
  
  # Map size categories to specific sizes
  scale_size_manual(
    values = c("Small" = 2, "Large" = 5), # Map categories to sizes
    breaks = c("Small", "Large"),         # Legend breaks
    labels = c("10–20", ">20")            # Legend labels
  ) +
  
  geom_vline(xintercept = .5, linetype = "dashed") +
  guides(color = guide_legend(title.position = "top", title.hjust = 0.5, reverse = TRUE),
         size = guide_legend(title.position = "top", title.hjust = 0.5)) +
  labs(size = 'Number of Observations',
       x = "Days from OPSW Implementation", y = "Oregon Water Quality Index",
       title = "RDiT Estimate of the Legislation Effect\nFull Sample: 1980-2021") +
  theme_bw() +
  theme(legend.position = "bottom",
        legend.direction = "horizontal",
        legend.key = element_blank(),
        legend.background = element_blank(),
        plot.title = element_textbox(
          hjust = 0.5, margin = margin(t = 5, b = 5))) +
  scale_x_continuous(breaks = c(-5000,-2500,0,2500,5000,7500))
plot_figurea1.4

# combine
plot_figurea1<-ggarrange(plot_figurea1.1, plot_figurea1.2, plot_figurea1.3, plot_figurea1.4,
                   ncol = 2, nrow = 2)

plot_figurea1<-plot_grid(plot_figurea1)
plot_figurea1
ggsave(file = "figurea1.jpg", width = 12, height = 10, dpi = 666)
ggsave(file = "figurea1.pdf", width = 12, height = 10, dpi = 666)

####Figure A2####
plot_figurea2 <- df[df$kw_placebo > 0,] %>%
  ungroup %>%
  mutate(`Treatment Status` = ifelse(diff_dates_placebo < 0, "Pre-1998", "Post-1998")) %>%
  ggplot(aes(x = diff_dates_placebo, y = wqi, group = `Treatment Status`, color = `Treatment Status`)) +
  
  geom_point(data = df[df$kw_placebo > 0,] %>% 
               ungroup %>% 
               mutate(diff_dates_placebo = round(diff_dates_placebo, 3)) %>% 
               group_by(diff_dates_placebo) %>% 
               dplyr::summarize(wqi = mean(wqi),
                                num_obs = n(),
                                size_category = case_when(
                                  num_obs >= 10 & num_obs <= 20 ~ "Small", # Small for 10–20
                                  num_obs > 20 ~ "Large"                   # Large for >20
                                )),
             aes(x = diff_dates_placebo, y = wqi, size = size_category), 
             alpha = .6, inherit.aes = F, pch = 1) +
  
  geom_smooth(method = "lm",
              formula = y ~ poly(x, 1),
              se = TRUE,
              size = 1,
              alpha = 0.8) +
  scale_color_manual(values = c("#00BFC4", "#F8766D")) +
  
  # Map size categories to specific sizes
  scale_size_manual(
    values = c("Small" = 2, "Large" = 5), # Map categories to sizes
    breaks = c("Small", "Large"),         # Legend breaks
    labels = c("10–20", ">20")            # Legend labels
  ) +
  
  geom_vline(xintercept = .5, linetype = "dashed") +
  guides(color = guide_legend(title.position = "top", title.hjust = 0.5, reverse = TRUE),
         size = guide_legend(title.position = "top", title.hjust = 0.5)) +
  labs(size = 'Number of Observations',
       x = "Days from Placebo OPSW Implementation", y = "Oregon Water Quality Index",
       title = "RDiT Placebo Estimate (1998) of the Legislation Effect") +
  theme_bw() +
  theme(legend.position = "bottom",
        legend.direction = "horizontal",
        legend.key = element_blank(),
        legend.background = element_blank(),
        plot.title = element_textbox(
          hjust = 0.5, margin = margin(t = 5, b = 5))) +
  scale_x_continuous(breaks = c(-1000, -500, 0, 500, 1000))
plot_figurea2
ggsave(file = "figurea2.jpg", width = 7, height = 5, dpi = 666)
ggsave(file = "figurea2.pdf", width = 7, height = 5, dpi = 666)

####Figure A3####
density <- rddensity(df$diff_dates, c = 0, p = 1)
summary(density)
rdplotdensity(density, df$diff_dates)
density.plot<-rdplotdensity(density, df$diff_dates,
                              title = "Density of Observations Near the Cutoff Date",
                              xlabel = "Days from OPSW Implementation",
                              ylabel = "Density",
                              lcol = c("#F8766D","#00BFC4"),
                              CIcol = c("#F8766D","#00BFC4"),
                              histFillCol = "grey40")
print(density.plot$Estplot)
plot_figurea3 <- density.plot$Estplot

plot_figurea3 <- plot_figurea3 + 
  ggplot2::geom_vline(xintercept=.5, linetype="dashed") + 
  ggplot2::theme_bw() +
  ggplot2::theme(legend.position = "none",
                 plot.title = element_textbox(
                   hjust = 0.5, margin = margin(t = 5, b = 5)))
plot_figurea3
ggsave(file = "figurea3.jpg", width = 7, height = 5, dpi = 666)
ggsave(file = "figurea3.pdf", width = 7, height = 5, dpi = 666)
